
options nocenter obs=max compress=yes mprint;

libname raw 'H:\rawdata\704351\Views';
libname formater 'D:\Formater\forskerformater';
libname home 'J:\Workdata\704351\Amalie\Networks\JOP Publication Files\Data';


/************ Macros ******************/

%macro getdata(data,start,end);
data &data&start&end (keep = &var);
set raw.&data.v (where= (&data.sourceyear>=&start & &data.sourceyear<=&end));
run;

data &data&start&end;
set &data&start&end;
aar = &data.sourceyear;
drop &data.sourceyear;
pnrny = pnr + 0;
drop pnr;
rename pnrny = pnr;
run;

%mend getdata;

* To use for BEF and FAIN registers, dated by Jan 1st instead of Dec 31st;

%macro getdata2(data,start,end);
data &data&start&end (keep = &var);
set raw.&data.v (where= (&data.sourceyear>=&start & &data.sourceyear<=&end));
run;

data &data&start&end;
set &data&start&end;
aar = &data.sourceyear-1;
drop &data.sourceyear;
pnrny = pnr + 0;
drop pnr;
rename pnrny = pnr;
run;
%mend getdata2;


/************ Get variables from relevant registers and save data in Stata format *********************/


/******** Population data at the individual-year level with demographics, income, and labor market variables **********/

/*** BEF, FAIN, FAM and SOGN ***/
* These registers are measured by Jan 1
* Overlap of variables in FAIN and BEF - rename FAIN-variables and keep both

* BEF;

%let var = 
pnr befsourceyear koen alder efalle familie_id familie_type kom bopikom opgangikom adrdato 
far_id mor_id civst fm_mark foed_dag generation ie_type opr_land;

%getdata2(bef,1986,2013); *this is end-year 1985-2012;

data bef19852012;
set bef19862013;
run;

data bef19852012;
set bef19852012;
yearborn = YEAR(foed_dag);
alder_ny = aar-yearborn;
run;

data bef19852012;
set bef19852012;
drop alder;
rename alder_ny = alder;
morid_ny = mor_id + 0;
farid_ny = far_id + 0;
drop mor_id far_id;
rename morid_ny = mor_id;
rename farid_ny = far_id;
run;

* FAIN;

%let var = 
pnr fainsourceyear adrdato aegtefaelle_id alder bopikom civst opgangikom;

%getdata2(fain,1981,2013); 

data fain19802012;
set fain19812013;
rename adrdato = adrdato_fain;
rename aegtefaelle_id = aegtefaelle_id_fain;
rename alder = alder_fain;
rename bopikom = bopikom_fain;
rename civst = civst_fain;
rename opgangikom = opgangikom_fain;
run;

* Merge FAIN and BEF, keep both;

proc sort data=bef19852012; by pnr aar; run;
proc sort data=fain19802012; by pnr aar; run;

data beffain19802012;
merge bef19852012 fain19802012;
by pnr aar;
run;


* SOGN - to be merged by KOM + OPGIKOM;

%let var = 
_ALL_;

%getdata2(sogn,1981,2013); *this is end-year 1980-2012, only available from 2002 though;

data sogn19802012;
set sogn19812013;
kom = kom_kode;
drop kom_kode;
sogn_ny = sogn + 0;
drop sogn;
rename sogn_ny = sogn;
drop pnr;
run;

* FAM - to be merged by familie_id;

%let var = 
familie_id famsourceyear antboernf antpersf;

%getdata2(fam,1981,2013);


* Merge family info on BEF/FAIN;

data fam19802012;
set fam19812013;
drop pnr;
fam_ny = familie_id + 0;
drop familie_id;
rename fam_ny = familie_id;
run;

data beffain19802012;
set beffain19802012;
fam_ny = familie_id + 0;
drop familie_id;
rename fam_ny = familie_id;
run;

proc sort data=fam19802012; by familie_id aar; run;
proc sort data=beffain19802012; by familie_id aar; run;

data beffainfam19802012;
merge beffain19802012(in=a) fam19802012;
by familie_id aar;
if a=1;
run;

* Merge SOGN (parish info) on BEF/FAIN/FAM;

proc sort data=sogn19802012; by kom opgangikom aar; run;
proc sort data=beffainfam19802012; by kom opgangikom aar; run;

data beffainfamsogn19802012;
merge beffainfam19802012(in=a) sogn19802012;
by kom opgangikom aar;
if a=1;
run;

* Check data;
proc sort data=beffainfamsogn19802012; by aar; run;
proc means data=beffainfamsogn19802012; var sogn koen alder; by aar; run;


/*** Other variables: income, occupation, education, etc. ***/
* These registers are measured by Dec 31

* INDK;

%let var = 
pnr indksourceyear oblgaeld pantgaeld bankgaeld oblakt pantakt kursakt bankakt qsluska2 qbrukor2 qlon qkapud rentupri;

%getdata(indk,1980,2013);

* INDH;

%let var = 
pnr indhsourceyear ejendomsvurdering koejd discoalle_indk disco08_alle_indk perindkialt personindk;

%getdata(indh,1980,2013);

* IDAP;

%let var = 
pnr idapsourceyear arledgr pstill sstill forskat leddel ledfuld ledighed;

%getdata(idap,1980,2013);

* UDDA - to be merged with formats;

%let var = 
pnr uddaupdsourceyear alm_vfra almaudd alminstnr erh_vfra erhaudd erhinstnr hf_vfra hfaudd hfinstnr ig_vfra iginstnr udd;

%getdata(uddaupd,1980,2013);

* UDDA formats;

data uddaupdformats;
set formater.uddan_2014_audd;
mergeudd = audd + 0;
run;

data uddaupd19802013;
set uddaupd19802013;
mergeudd = hfaudd + 0;
run;

proc sort data=uddaupd19802013; by mergeudd; run;
proc sort data=uddaupdformats; by mergeudd; run;

data uddaupd19802013;
merge uddaupd19802013(in=a) uddaupdformats;
by mergeudd;
if a=1;
run;


* Make sure pnr is numeric;

data beffainfamsogn19802012;
set beffainfamsogn19802012;
pnrny = pnr + 0;
drop pnr;
rename pnrny = pnr;
run;

data indk19802013;
set indk19802013;
pnrny = pnr + 0;
drop pnr;
rename pnrny = pnr;
run;

data indh19802013;
set indh19802013;
pnrny = pnr + 0;
drop pnr;
rename pnrny = pnr;
run;

data idap19802013;
set idap19802013;
pnrny = pnr + 0;
drop pnr;
rename pnrny = pnr;
run;

data uddaupd19802013;
set uddaupd19802013;
pnrny = pnr + 0;
drop pnr;
rename pnrny = pnr;
run;


* Sort and check for duplicates;

proc sort data=beffainfamsogn19802012 nodupkey dupout=dupbef; by pnr aar; run;
proc sort data=indk19802013 nodupkey dupout=dupindk; by pnr aar; run;
proc sort data=indh19802013 nodupkey dupout=dupindh; by pnr aar; run;
proc sort data=idap19802013 nodupkey dupout=dupidap; by pnr aar; run;
proc sort data=uddaupd19802013 nodupkey dupout=dupuddaupd; by pnr aar; run;


* Final data set;

data home.registerdata19802013; 
merge beffainfamsogn19802012(in=a) 
indk19802013 indh19802013 idap19802013 uddaupd19802013;
by pnr aar;
if a=1;
run;

* Save in Stata format;
proc export data=home.registerdata19802013 outfile='J:\Workdata\704351\Amalie\Networks\JOP Publication Files\Data\registerdata19802013.dta' replace; run;








/******** Population data at the individual-job-year level with job info to construct workplace network links **********/


* IDAN;

%let var = _ALL_;

%getdata(idan,1980,2013); 

data home.idan19802013;
set idan19802013;
run;

proc export data=home.idan19802013 outfile='J:\Workdata\704351\Amalie\Networks\JOP Publication Files\Data\idan19802013.dta' replace; run;









/******** Population data at the individual-year level with highest achieved degree and degree institution to construct education network links **********/


* Merge BEF and UDDA;

proc sort data=beffain19802012; by pnr aar; run;
proc sort data=uddaupd19802013; by pnr aar; run;

data befuddaupd19802012;
merge beffain19802012(in=a) uddaupd19802013;
by pnr aar;
if a;
run; 

data udda_hf;
set befuddaupd19802012 (keep=HFINSTNR HF_VFRA hfaudd APUBL1 ATEXT H1TEKST KOMP M1TEKST PRIA U1TEKST aar afsp1e h1 m1 mergeudd pnr u1 );
run;

data udda_hf;
set udda_hf;
rename hfinstnr = instnr;
rename hf_vfra = graddate;
educ = hfaudd + 0;
drop hfaudd;
run;

data udda;
set udda_hf;
gradyear=YEAR(graddate);
run;

data udda;
set udda;
missing=.;
if educ^=. and (instnr=999999 or instnr=. or instnr=0) then missing=1; *Missing out of people with educ info;
if educ^=. and instnr^=999999 and instnr^=. and instnr^=0 then missing=0; 
run;

proc sort data=udda; by aar; run;
proc means data=udda; var missing; by aar; run;
proc sort data=udda; by descending missing; run;

data udda_nomiss;
set udda (where= (missing=0));
run;

proc sort data=udda_nomiss; by pnr aar educ instnr descending gradyear; run;
proc sort data=udda_nomiss nodupkey out=udda_nomiss_u dupout=udda_nomiss_dup; by pnr aar instnr educ gradyear; run;

data home.udda_final;
set udda_nomiss_u;
rename aar = year;
run;

proc export data=home.udda_final outfile='J:\Workdata\704351\Amalie\Networks\JOP Publication Files\Data\udda_final.dta' replace; run;



